Load in CSV data

age_interval <- 1

naming_data <- read_csv("data/naming_colors_participants.csv") %>%
  left_join(read_csv("data/naming_colors_data.csv"), by = 'subj')

grouping_data <- read_csv("data/grouping_colors_participants.csv") %>%
  left_join(read_csv("data/grouping_colors_data.csv"), by = 'subj')

shipibo_child_data <- read_csv("data/shipibo_children_colors_participants.csv") %>%
  left_join(read_csv("data/shipibo_children_colors_data.csv"), by = 'subj')

spanish_child_data <- read_csv("data/spanish_children_colors_participants.csv") %>%
  left_join(read_csv("data/spanish_children_colors_data.csv"), by = 'subj')

color_chip_data <- read_csv("data/wcs_measures.csv", skip = 1)


string_spelling_list <- "`Ami` = c('ami'), `Ambi` = c('ambi'), `Barin Poi` = c('barin pui', 'barrin pui', 'barrinpui', 'pui', 'barin poi', 'barrin poi', 'bavrinpui*', 'barri'), `Bexnan` = c('berrnan', 'bexna', 'bexnan'), `Kari` = c('cari', 'carri', 'kari', 'karri'), `Chexe` = c('chese', 'chexe'), `Chimapo` = c('chimapu'), `Emo` = c('emu'), `Jimi` = c('jimi'), `Jisa` = c('jisa'), `Joshin` = c('joshin', 'joxin', 'toshin'), `Joxo` = c('josho', 'joxo'), `Kasho` = c('kashos'), `Keskiti` = c('kex keti'), `Koin` = c('kuin'), `Kononbi` = c('kunumbi'), `Konron` = c('korrum', 'kumrrum', 'kunrrum'), `Koro` = c('coro'), `Mai` = c('mai'), `Mandi` = c('mandi'), `Manxan` = c('manrran', 'manshan', 'manxam', 'manxan', 'maxan', 'maxna'), `Maxe` = c('maxe'), `Nai` = c('nai', 'nia'), `Oxne` = c('oshne'), `Pei` = c('pei'), `Poa` = c('pua'), `Pene` = c('pene'), `Panshin` = c('panshin'), `Pasna` = c('paxsna', 'pasna'), `Paxna` = c('parrna', 'paxna'), `Ranchesh` = c('ranchex'), `Spanish Term` = c('rojo', 'blanco', 'verde', 'amarillo', 'celeste', 'negro', 'morado', 'azul', 'marron', 'bioleta', 'verdesito', 'carne', 'naranjada', 'naranjado', 'amarilla', 'agua', 'agur', 'uva color*', 'violeta', 'pasto payota', 'naranja', 'chocolate', 'rosado', 'rosada', 'narranxa', 'anaranjado', 'coral', 'cerde', 'gris', 'oscuro', 'lila', 'azu', 'color cielo', 'cielo'), `Tena` = c('tena'), `Wiso` = c('wiso'), `Xena` = c('xena'), `Xo` = c('xo'), `Xexe` = c('xexe', 'xexi'), `Yame` = c('rayame', 'yame'), `Yankon` = c('rayanko', 'yankom', 'yankon', 'yankum', 'yankun', 'yankontani', 'yakon', 'yakun', 'yankoncha'), `NA` = c(NA)"

spelling_list <- eval(parse(text = paste0("c(",string_spelling_list,")")))

naming_data %<>%
  mutate(color_cat = ifelse(is.na(color_cat), first_response, color_cat)) %>%
  mutate(color_cat = ifelse(color_cat %in% unlist(spelling_list), color_cat, NA)) %>%
  mutate(color_cat = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list, "forcats::fct_collapse(color_cat, x)")))
         )

grouping_data %<>%
  mutate(`nombre del grupo` = ifelse(`nombre del grupo` %in% unlist(spelling_list), 
                                     `nombre del grupo`, NA)) %>%
  mutate(`nombre del grupo` = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list, "forcats::fct_collapse(`nombre del grupo`, x)")))
         )

color_chip_data %<>%
  mutate(hex = colorspace::hex(
    colorspace::LAB(color_chip_data$`L*`, color_chip_data$`a*`, 
                    color_chip_data$`b*`, color_chip_data$`#cnum`), fixup = T))

Which terms appear to be basic and commonly used?

naming_data_profusion <- naming_data %>%
  group_by(subj, color_cat) %>%
  summarise(n = n()) %>%
  group_by(color_cat) %>%
  spread(subj, n, fill = 0) %>%
  gather(key = 'subj', value = 'n', -color_cat) %>%
  summarise(`% of Subjects Who Used the Term` = 100*sum(n > 0)/n(), 
            `Mean % of Chips in Set Labeled` = 100*mean(n)/165) %>%
  dplyr::rename(`Color Term` = color_cat)

naming_list <- as.character(na.omit(filter(naming_data_profusion, `% of Subjects Who Used the Term` > 50 & !is.na(`Color Term`))$`Color Term`))

datatable(naming_data_profusion, rownames = FALSE)

In the naming task with 165 color chips, commonly used terms include:

num_groups <- grouping_data %>%
  filter(task == 1) %>%
  group_by(subj) %>%
  summarise(`# of Groups` = n_distinct(`nombre del grupo`)) %>%
  ungroup() %>%
  summarise(`Avg # of Groups` = mean(`# of Groups`), 
            `Min # of Groups` = min(`# of Groups`),
            `Max # of Groups` = max(`# of Groups`))

grouping_data_profusion <- grouping_data %>%
  filter(task == 1) %>%
  group_by(subj, `nombre del grupo`) %>%
  summarise(`cuantas tarjetas` = mean(`cuantas tarjetas`)) %>%
  group_by(`nombre del grupo`) %>%
  spread(subj, `cuantas tarjetas`, fill = 0) %>%
  gather(key = 'subj', value = 'n', -`nombre del grupo`) %>%
  summarise(`% of Subjects Who Used the Term` = 100*sum(n > 0)/n(), 
            `Mean % of Chips in Set Labeled` = 100*mean(n)/60) %>%
  dplyr::rename(`Color Term` = `nombre del grupo`)

grouping_list <- as.character(na.omit(filter(grouping_data_profusion, `% of Subjects Who Used the Term` > 50 & !is.na(`Color Term`))$`Color Term`))

datatable(grouping_data_profusion, rownames = FALSE)

In the grouping task with 60 chips, subjects usually create between 4-7 groups and mostly use terms like:

For each color chip, how many adults label it with the same term?

consensus <- 75

naming_consensus <- naming_data %>%
  select(subj, chip_id, color_cat) %>%
  mutate(set = ifelse((chip_id %% 2) == 0, 'even', 'odd')) %>%
  split(.$set) %>%
  map_df(function(x) {
    x %>%
    group_by(chip_id, color_cat) %>%
    summarise(n = n()) %>%
    group_by(chip_id) %>%
    mutate(perc = 100*n/sum(n)) %>%
    select(-n)
  }) %>%
  arrange(chip_id) %>%
  rename(`Chip ID` = chip_id, `Color Term` = color_cat, `% of Subjects` = perc)

datatable(naming_consensus %>%
            spread(`Color Term`, `% of Subjects`, fill = 0), 
  rownames = FALSE, fillContainer = TRUE)
focal_terms <- pander::p(as.character(
  unique(filter(naming_consensus,`% of Subjects` >= consensus)$`Color Term`)), 
  wrap = '', copula = ', and ')

color_chip_hexes <- color_chip_data[, c('#cnum', 'hex')]


highest_chips <- (naming_consensus %>% group_by(`Color Term`) %>%
  filter(`% of Subjects` >= consensus & `% of Subjects` == max(`% of Subjects`)))$`Chip ID`

agreed_chips <- naming_consensus %>%
  group_by(`Color Term`) %>%
  filter(`% of Subjects` >= consensus) %>%
  arrange(`Color Term`, `Chip ID`) %>%
  left_join(color_chip_hexes,
            by = c("Chip ID" = "#cnum")) %>%
  dplyr::rename(`Hex Code` = hex) %>%
  mutate(highest_chips = ifelse(`Chip ID` %in% highest_chips, 1, 0))


datatable(agreed_chips, rownames = FALSE,
          options=list(columnDefs = list(list(
            visible=FALSE, targets=c(grep('highest_chips', names(agreed_chips))-1))))) %>%
  formatStyle('highest_chips', target = 'row',
              fontWeight = styleEqual(c(0,1), c('normal','bold'))) %>%
  formatStyle(columns = "Hex Code",
              background = styleEqual(agreed_chips$`Hex Code`, agreed_chips$`Hex Code`))

The only categories with chips that reach a high level of consensus appear to be Yankon, Joshin, Panshin, Joxo, and Wiso

Is there a similar amount of consensus on labeling between children and adults (in Shipibo)?

chip_set <- as.numeric(grep(pattern = "^[0-9]+$", unique(shipibo_child_data$response_1), value = T))

shipibo_1st_response <- shipibo_child_data %>%
  mutate(age = ifelse(is.na(age), as.numeric(as.character(edad)), as.numeric(as.character(age)))) %>%
  filter(task == 1) %>%
  mutate(response_1 = ifelse(response_1 %in% unlist(spelling_list), 
                                     response_1, NA)) %>%
  mutate(response_1 = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list,
                                                      "forcats::fct_collapse(response_1, x)")))
         ) %>%
  mutate( age_ints = round(age/age_interval)*age_interval) %>%
  select(subj, age, age_ints, prompt, response_1) %>%
  split(.$age_ints) %>%
  map_df(function(x) {
    x %>%
      mutate(response_1 = as.character(response_1)) %>%
      spread(prompt, response_1, fill = 'No Response') %>%
      gather(key = 'prompt', value = 'response', -subj, -age, -age_ints) %>%
      group_by(age_ints, prompt, response) %>%
      summarise(n = n()) %>%
      group_by(age_ints, prompt) %>%
      mutate(perc = 100*n/sum(n), n_total = sum(n))
  }) %>% ungroup() %>%
  mutate(prompt = as.numeric(as.character(forcats::fct_collapse(prompt,
                                        `1` = c('celeste'),
                                        `234` = c('verde'),
                                        `245` = c('rojo'),
                                        `274` = c('blanco'),
                                        `297` = c('amarillo'),
                                        `312` = c('negro'),
                                        `320` = c('mierda sol'),
                                        `325` = c('morado'))))) %>%
  left_join(color_chip_hexes,
            by = c("prompt" = "#cnum")) %>%
  rename(Age = age_ints, `Chip ID` = prompt, `Color Term` = response, 
         `% of Subjects` = perc, `Hex Code` = hex) %>%
  filter(n_total >= 4)
  
  
graph_colors <- c(
  'Ambi' = '#874A8C',
  'Ami' = '#76296E',
  'Barin Poi' = '#6D6212',
  'Bexnan' = '#B6D744',
  'Chexe' = '#81C147',
  'Chimapo' = '#003459',
  'Emo' = '#007177',
  'Jimi' = '#822158',
  'Joshin' = '#BC1E47',
  'Joxo' = '#F3F3F3',
  'Kari' = '#571848',
  'Kasho' = '#F07000',
  'Keskiti' = '#E56F92',
  'Koin' = '#50491D',
  'Kononbi' = '#503B87',
  'Konron' = '#BB8F00',
  'Koro' = '#7B7B7B',
  'Mai' = '#7F5A21',
  'Mandi' = '#005637',
  'Manxan' = '#FEBBA1',
  'Maxe' = '#DC4800',
  'Nai' = '#19A2C2',
  'Oxne' = '#66BCC9',
  'Panshin' = '#EDC800',
  'Pasna' = '#D3C5DF',
  'Paxna' = '#EC99A2',
  'Pei' = '#69C360',
  'Pene' = '#55471E',
  'Poa' = '#7E4E94',
  'Ranchesh' = '#4A2347',
  'Tena' = '#C5D500',
  'Yame' = '#666412',
  'Yankon' = '#00A79E',
  'Wiso' = '#272727',
  'Xena' = '#D4799C',
  'Xexe' = '#9769AE',
  'Xo' = '#3A6E14',
  'Spanish Term' = '#FF6E00'
)


adult_naming <- naming_consensus %>%
  group_by(`Color Term`) %>%
  mutate(Age = 18) %>%
  arrange(`Chip ID`, `Color Term`) %>%
  left_join(color_chip_hexes,
            by = c("Chip ID" = "#cnum")) %>%
  filter(`Chip ID` %in% chip_set & !is.na(`Color Term`)) %>%
  dplyr::rename(`Hex Code` = hex)

naming_data_combined <- bind_rows(shipibo_1st_response, adult_naming)


term_prototypes <- naming_consensus %>%
  group_by(`Color Term`) %>%
  dplyr::arrange(`Color Term`, desc(`% of Subjects`)) %>%
  slice(1:3) %>%
  left_join(color_chip_hexes,
            by = c("Chip ID" = "#cnum")) %>%
  dplyr::rename(`Hex Code` = hex)

chip_set_data <- color_chip_data %>% 
  filter(`#cnum` %in% chip_set) %>%
  select(`#cnum`, `L*`, `a*`, `b*`, hex) %>%
  arrange(`#cnum`) %>%
  rename(`Chip ID` = `#cnum`, `Hex Code` = hex)

datatable(chip_set_data, rownames = FALSE) %>%
  formatStyle(columns = "Hex Code",
              background = styleEqual(chip_set_data$`Hex Code`, chip_set_data$`Hex Code`))
p <- ggplot(filter(naming_data_combined, Age < 18), 
       aes(x = Age, y = `% of Subjects`, group = `Color Term`, colour = `Color Term`)) +
  facet_wrap(~`Chip ID`) +
  geom_line(size = 1) +
  geom_point( size=3) +
  geom_point(data = filter(naming_data_combined, Age >= 18), size=3) +
  scale_x_continuous(breaks = c(seq(6,12,2),18), labels = c(seq(6,12,2),'Adult')) +
  scale_colour_manual(name = "Color Term",values = graph_colors) +
  theme(panel.grid = element_blank())

ggplotly(p)